// cd /projects/hsieh_project/proj_201809/code_2_202011/
// qstata city_new_ind_emp_share_top_mkt.do &

set linesize 255
capture log close
log using /projects/hsieh_project/proj_201809/code_2_202011/city_new_ind_emp_share_top_mkt_log, replace text

clear all
cd /projects/

di "Started at $S_TIME $S_DATE"

global rev_date: display %tdYYNNDD date("$S_DATE", "DMY")
di "${rev_date}"

global dir_proj "/projects/hsieh_project/proj_201809/"

global dir_do "${dir_proj}/code_2_202011/"
global dir_data "${dir_proj}/data/"
global dir_out "${dir_proj}/output/202011_main/"
noi capture mkdir ${dir_out}
global dir_outf "${dir_out}/city_new_ind_emp_share_top_mkt/"
noi capture mkdir ${dir_outf}

global year1 = 1977
global year2 = 2013

global gl_perc = "1 10"
local l_perc = "1 10"

global lmkt_labels = "n_est n_msa1983"
global lmkt_labels2 = "est msa1983"

do "/projects/hsieh_project/code_0_general/f_rounding.do"
do "/projects/hsieh_project/code_0_general/f_reg.do"

global ds_ind "${dir_data}/ind_sum_all"

//==============================================================================
/*
Author: Adarsh Kumar
Objective: Share of employment in each msa1983 of new industries in 2013 (compared to 1977), where share is calculated as:

1. Total Emp in new ind in city / Total Emp in City 
2. Total Emp of top 1,10% firms in new ind in city / Total Emp in City
3. Total Emp of top 1, 10% firms in new ind in city / Total Emp in new ind in city
4. Same as 3, but remove from the denominator sum of emp in city-new industry for those new-industries with 0 employment in top 1, 10% in the city.
*/


*Updating program previously written for top employment-firms 
capture program drop read_city
program read_city
args top_mkt_var
local n_var = "n_`top_mkt_var'"
use "${dir_data}/cityind_top_mkt_`top_mkt_var'_sum.dta", clear
keep if year == $year1 | year == $year2
tab year 

bys year msa1983: egen emp_msa1983 = total(emp_cind)
sort msa1983 year
bys msa1983: gen emp_msa1983_1977 = emp_msa1983[1]

sort msa1983 ch_ind
gen ind = 1
by msa1983 ch_ind: gen ind_new = 1 if _N == 1 & year == 2013
tab ind_new

*Merge ind_sum_all to get # firms per industry 
merge m:1 year ch_ind using ${ds_ind}, nogen keepus(n_ind) 

gen emp_cind_new = emp_cind if ind_new == 1
foreach i_perc in $gl_perc {
	gen ind_new_`i_perc' = 1 if ind_new == 1 & emp_ind_`n_var'_`i_perc' != 0
	gen emp_cind_new_`i_perc' = emp_cind if ind_new == 1 & emp_ind_`n_var'_`i_perc' != 0
	gen emp_cind_new_i`i_perc' = emp_ind_`n_var'_`i_perc' if ind_new == 1 & emp_ind_`n_var'_`i_perc' != 0

	
	*Adjusting denominators to remove industries with < 100/i_perc firms 
	gen emp_cind_`i_perc' = emp_cind 
	gen emp_cind_new_`i_perc'_adj = emp_cind_new_`i_perc'
	replace emp_cind_`i_perc' = . if n_ind < (100 / `i_perc')
	replace emp_cind_new_`i_perc'_adj = . if n_ind < (100 / `i_perc')

	
}
collapse (sum) emp_cind emp_cind_1* emp_cind_new* ind ind_new* (first) emp_msa1983_1977, by(year msa1983)
keep if year == $year2
gen ln_emp_msa1983_1977 = ln(emp_msa1983_1977)
foreach i_perc in $gl_perc {
	gen ln_emps_cind_new_`i_perc' = ln(emp_cind_new_`i_perc'/emp_cind)
	gen ln_emps_cind_new_i`i_perc'_a = ln(emp_cind_new_i`i_perc'/emp_cind)
	gen ln_emps_cind_new_i`i_perc'_n = ln(emp_cind_new_i`i_perc'/emp_cind_new)
	gen ln_emps_cind_new_i`i_perc'_n`i_perc' = ln(emp_cind_new_i`i_perc'/emp_cind_new_`i_perc')
	
	// non-ln version
	gen emps_cind_new_`i_perc' = emp_cind_new_`i_perc'/emp_cind
	gen emps_cind_new_i`i_perc'_a = emp_cind_new_i`i_perc'/emp_cind
	gen emps_cind_new_i`i_perc'_n = emp_cind_new_i`i_perc'/emp_cind_new
	gen emps_cind_new_i`i_perc'_n`i_perc' = emp_cind_new_i`i_perc'/emp_cind_new_`i_perc'
	
	//Creating 'adjusted' version where denominators are adjusted for industries with too few industries for percentiles to work
	gen emps_cind_new_`i_perc'_adj = emp_cind_new_`i_perc'/emp_cind_`i_perc'
	gen emps_cind_new_i`i_perc'_a_adj = emp_cind_new_i`i_perc'/emp_cind_`i_perc'
	gen emps_cind_new_i`i_perc'_n_adj = emp_cind_new_i`i_perc'/emp_cind_new_`i_perc'_adj
	gen emps_cind_new_i`i_perc'_n`i_perc'_adj = emp_cind_new_i`i_perc'/emp_cind_new_`i_perc'_adj
	
}

*Calculate 'regular' new-ind emp measure
gen emps_cind_new = emp_cind_new / emp_cind 

local i_var = "ln_emps_cind_new"
end


program f_fig_wo_scatter
args y y_lab y_save note

twoway (lpolyci `y' ln_emp_msa1983_1977, level(99) clc(navy) clw(medthick) fc(none) alcolor(gs12)), ///
	ytitle("Share of employment") xtitle("ln(City Employment in ${year1})") ///
	title("`y_lab'") ///
	legend(off) ///
	note("99% CI. `note'")
graph export "${dir_outf}/lpoly_`y_save'_X_ln_c_emp_${year1}_msa1983.png", replace width(3000) height(2000)

end

//==============================================================================
// Aggreate across all industries
foreach mkt in $lmkt_labels2{
		
	read_city "`mkt'"

	f_fig_wo_scatter "emps_cind_new" "Emp in New Ind / MSA1983 Emp" "emps_`mkt'_NewAll_cFUll" "Top firms defined by # `mkt'"
	
	foreach perc in $gl_perc {
		f_fig_wo_scatter "emps_cind_new_`perc'" "Emp in New Ind w/ non-zero top `perc'% firms / MSA1983 Emp" "emps_`mkt'_NewAll`perc'_cFull" "Top firms defined by # `mkt'"
		
		f_fig_wo_scatter "emps_cind_new_i`perc'_a" "Emp in Top `perc'% Firms of New Ind / MSA1983 Emp" "emps_`mkt'_NewOnly`perc'_cFull" "Top firms defined by # `mkt'"
		
		f_fig_wo_scatter "emps_cind_new_i`perc'_n" "Emp in Top `perc'% Firms of New Ind / New Ind MSA1983 Emp" "emps_`mkt'_NewOnly`perc'_cNew" "Top firms defined by # `mkt'"
		
		f_fig_wo_scatter "emps_cind_new_i`perc'_n`perc'" "Emp in Top `perc'% Firms of New Ind / New Ind MSA1983 Emp *" "emps_`mkt'_NewOnly`perc'_cNew`perc'" "Top firms defined by # `mkt'. *Excludes New Ind with 0 top `perc'% emp in MSA1983."
		
		//"Adjusted" for too few firms in industry plots
		f_fig_wo_scatter "emps_cind_new_`perc'_adj" "Emp in New Ind w/ non-zero top `perc'% firms / MSA1983 Emp" "ADJ_emps_`mkt'_NewAll`perc'_cFull" "Top firms defined by # `mkt'. Industries with fewer than (100 / `perc') firms are not considered."
		
		f_fig_wo_scatter "emps_cind_new_i`perc'_a_adj" "Emp in Top `perc'% Firms of New Ind / MSA1983 Emp" "ADJ_emps_`mkt'_NewOnly`perc'_cFull" "Top firms defined by # `mkt'. Industries with fewer than (100 / `perc') firms are not considered."
		
		f_fig_wo_scatter "emps_cind_new_i`perc'_n_adj" "Emp in Top `perc'% Firms of New Ind / New Ind MSA1983 Emp" "ADJ_emps_`mkt'_NewOnly`perc'_cNew" "Top firms defined by # `mkt'. Industries with fewer than (100 / `perc') firms are not considered."
		
		f_fig_wo_scatter "emps_cind_new_i`perc'_n`perc'_adj" "Emp in Top `perc'% Firms of New Ind / New Ind MSA1983 Emp *" "ADJ_emps_`mkt'_NewOnly`perc'_cNew`perc'" "Top firms defined by # `mkt'. *Excludes New Ind with 0 top `perc'% emp in MSA1983. Industries with fewer than (100 / `perc') firms are not considered."
		
		
}

	keep ln_emp_msa1983_1977 emps_cind_new_1* emps_cind_new_i*_a emps_cind_new_i*_n* emps_*_adj 
	save "${dir_outf}/plotting_data_`mkt'", replace
}
	





